# get the ascii data from our online archives
# "names" defined here are the set of country 
# acronyms we are going to use later.
states.wg<-read.csv("http://staff.washington.edu/xuncao/Week_Two/Trade_Ward_Gled.csv")
states.wg$Name<-as.character(states.wg$Name)
names<-sort(c("USR",as.character(unique(states.wg$GW)))) 
length.st<-length(unique(states.wg$GW))+1

# monadic effects first: 
pop     <-dget("http://staff.washington.edu/xuncao/Week_Two/Pop")/1000 # in 1000,000s
gdp     <-dget("http://staff.washington.edu/xuncao/Week_Two/GDP")/1000000000      # in billions$
polity  <-dget("http://staff.washington.edu/xuncao/Week_Two/polity")   # -10 to 10

# create nomadic covariates
s.r<-as.data.frame(na.omit(cbind(as.data.frame(pop)$"1985",as.data.frame(gdp)$"1985", as.data.frame(polity)$"1985", names)))
colnames(s.r)<-c("sr.pop","sr.gdp","sr.polity","sr.names")
rownames(s.r)<-as.character(s.r$sr.names)
s.r$sr.pop<-(as.numeric(as.character(s.r$sr.pop))) # in million;
s.r$sr.gdp<-(as.numeric(as.character(s.r$sr.gdp))) # in billion;
s.r$sr.gdp<-(s.r$sr.gdp)/(s.r$sr.pop) # this is in $ 1000
s.r$sr.polity<-(as.numeric(as.character(s.r$sr.polity))) # -10 to 10;
s.r$sr.names<-as.character(s.r$sr.names)
colnames(s.r)<-c("sr.pop","sr.gdpcap","sr.polity","sr.names")

# coerce into matrices, Xs and Xr
Xs<-as.matrix(s.r[,c(1,2,3)])
Xr<-as.matrix(s.r[,c(1,2,3)]) 

rownames(Xs)<-as.character(s.r$sr.names)
rownames(Xr)<-as.character(s.r$sr.names)

n<-length(rownames(Xr)) # in the year 1985, n is 137 


### Yd second: 
# make it binary ==> an asymmetric matrix with 1 noticing there is a MID and 0 not a dispute;
mid.85  <-dget("mid.1985")  
Yd<-mid.85[rownames(Xr), rownames(Xr)]
Yd[Yd>1]<-1
diag(Yd)<-0
sum(Yd)



### Dyadic part finally: 
# JointDemo first:
mat<-matrix(0, ncol=n, nrow=n)
colnames(mat)<-rownames(Xr)
rownames(mat)<-rownames(Xr)
for (i in 1:n){
for (j in 1:n){if (Xs[,3][i]>6 & Xs[,3][j]>6) {mat[i,j]<-1}
                       }}
diag(mat)<-0
JointDemo.85<-mat # this has large amount of NAs;

polity  <-dget("http://staff.washington.edu/xuncao/Week_Two/polity")   # -10 to 10
mat<-matrix(NA, ncol=length.st, nrow=length.st)
colnames(mat)<-names
rownames(mat)<-names
for (i in 1:length.st){
for (j in 1:length.st){mat[i,j]<-as.numeric(polity[,"1985"][i])*as.numeric(polity[,"1985"][j])
                       }}
pol.sim.85<-mat # this has large amount of NAs;
pol.sim.85<-pol.sim.85[rownames(Xr), rownames(Xr)]
diag(pol.sim.85)<-0

# economic interdependence: 
imp.85  <-dget("http://staff.washington.edu/xuncao/Week_Two/1985/imp.1985")  # million $
exp.85  <-dget("http://staff.washington.edu/xuncao/Week_Two/1985/exp.1985")  # million $
imp.85 <-imp.85[rownames(Xr), rownames(Xr)]
exp.85 <-exp.85[rownames(Xr), rownames(Xr)]
inter.85<-exp.85+imp.85
diag(inter.85)<-0

for (i in 1: dim(inter.85)[1])
            {temp<-(10^3)*(inter.85[i,]*10^6)/(gdp[rownames(Xr),"1985"][i]*10^9) # here sum of the import and export as 10 times percentage of the GDP;
             inter.85[i,]<-temp}
Inter.85<-round(inter.85, digits=3)
InterGap.85<-round(abs(inter.85-t(inter.85)), digits=3)

# shared IGO:
igo.85  <-dget("http://staff.washington.edu/xuncao/Week_Two/1985/igo.1985") # counts of shared igos, 0,..., 60
igo.85  <-igo.85[rownames(Xr), rownames(Xr)]
# these are the counts, for this year, the average is about 24;
# It is a surprise to see that average shared IGO membership is that high; 
# I think we need to further differentiate different sort of IGOs. 

# control: 
distance<-dget("http://staff.washington.edu/xuncao/Week_Two/distance") # 1000 km
distance<-distance[rownames(Xr), rownames(Xr)]
diag(distance)<-0
# there are 14 NAs, They are HSD, HSE, MEC, MOD, OFS, PAP, PMA, SAX, SIC, TBT, TRA, TUS, UPC, WRT;
# BUT THESE ARE COUNTRIES NON-EXISTENT IN THE PERIOD WE STUDY.

# now politically relevant dyads: 
mat<-matrix(0, ncol=length.st, nrow=length.st)
colnames(mat)<-names
rownames(mat)<-names

mat[,"USA"]<-1;mat["USA",]<-1
mat[,"USR"]<-1;mat["USR",]<-1

continue<-read.table("C:/XunCao/Research/Data/Direct_Contiguity_V3-0/Dyadic Direct Contiguity.txt", sep=",", header=T)
con.85<-subset(continue, continue$Year==1985)
con.85$State1Ab<-as.character(con.85$State1Ab)
con.85$State2Ab<-as.character(con.85$State2Ab)
con.85<-subset(con.85, con.85$State1Ab!="YAR")
con.85<-subset(con.85, con.85$State2Ab!="YAR")
##### there is one "YAR" not in our data. Need to work on this later!!!!

for (i in c(1:dim(con.85)[1])){mat[con.85$State1Ab[i],con.85$State2Ab[i]]<-1
                            mat[con.85$State2Ab[i],con.85$State1Ab[i]]<-1}
diag(mat)<-0
relevant<-mat[rownames(Xr), rownames(Xr)]
sum(relevant)
# [1] 1232
sum(Yd)
# [1] 112

#    State1No State1Ab State2No State2Ab Year ContType Version
# 66         2      USA       20      CAN 1985        1       3
# 94         2      USA       31      BHM 1985        4       3
# 191        2      USA       40      CUB 1985        4       3
# 361        2      USA       70      MEX 1985        1       3



xd<-list(pol.sim.85, Inter.85, igo.85, relevant, distance)
all.1985<-NULL
for (i in 1:length(xd)) {
                        for (j in 1:dim(xd[[i]])[1]){all.1985<-c(all.1985,xd[[i]][,j])}
                        }
Xd<-array(all.1985, dim=c(dim(xd[[i]])[1], dim(xd[[i]])[1], length(xd)))
dimnames(Xd)<-list(rownames(Xr),rownames(Xr),c("pol.sim.85", "Inter.85", "igo.85", "relevant","distance"))

# Xd, Xr, Xs must not contain missing data
# Yd may contain missing data; it is imputed if model is Gaussian.

# write to a file, after creating list of all necessary data
data.1985<-list(Yd,Xd,Xs,Xr)
data.1985<-dump("data.1985","data1985")

# read in program to analyze the data
source("http://www.stat.washington.edu/hoff/Code/GBME/gbme.r")
n<-dim(Yd)[1]
Y<-Yd

# call the main estimation package, gbme
gbme(Y=Yd,Xd=Xd,Xs=Xs,Xr=Xr,k=3,fam="binomial",N=matrix(1,n,n),NS=1000000,awrite=T,bwrite=T, directed=T) 
